Setup

Running comments to send AGRRA

  • date error in one of my fish transects
  • inconsistency in families between coral recruits and coral spp?

Benthic

Point cover

Data overview:

  • 2017-2022: “BenthicPointCoverScaledByTransect.xlsx”
    • scaled to not include non-reef substrate
    • cover is in percent format (X/100)
    • TAS and TAM included but not STA
    • some transect numbers include surveyor initials
  • 2025: “BenthicCoverScaledByTransect.xlsx”
    • scaled to not include non-reef substrate)
    • cover is in proportion format (X/1)
    • TAS/TAM/STA only included within tTA sheet (on “overall” sheet TA includes all forms of turf)
    • Little Bird Patch = Little Bird Backreef
  • Here “TAS” will refer to TAS + TAM (+ STA in 2025)

Import + wrangle data

# define variables to include (will remove CYAN and PEYS from some analyses)
benthic_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "cyan", "ainv", "peys")


benthic_cover_trans <- 
  # historical data
  read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "BenthicPointCoverScaled", "BenthicPointCoverScaledByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  mutate(year = year(date),
         trans = as.character(trans), # to bind with 2025 NPA data with initials
         t_tas = tas + tam) %>% 
  select(site, year, trans, benthic_var) %>%
  mutate(across(benthic_var, ~ . / 100)) %>% # convert to proportion format
  # bind 2025 data
  bind_rows(
    bind_rows(
      # NEMMA
      read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
        clean_names() %>%
        mutate(`transect_name` = as.character(`transect_name`)) %>%  # to bind with 2025 NPA data with initials
        # incorporate TA sheet
        left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                    clean_names() %>%
                    select(transect_id, ta, sta, tas, tam)),
      # NDNP
      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "Overall") %>%
        clean_names() %>%
        # incorporate TA sheet
        left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoverScaledByTransect.xlsx"), sheet = "tTA") %>%
                    clean_names() %>%
                    select(transect_id, ta, sta, tas, tam))) %>%
      mutate(year = 2025,
             survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name),
             tas_sum = tas + sta + tam) %>%
      select(site = survey_name, year, trans = transect_name, lc = t_coral, cca = t_cca, ta, tas = tas_sum, fma = t_fma, cma = t_cma, cyan = t_cyan, ainv = t_ainv, peys = t_pey)) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

benthic_cover_site <- benthic_cover_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

benthic_cover_year <- benthic_cover_site %>%
  select(!contains("_sd")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(year, site_cat) %>%
  summarise(across(c(benthic_var),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        sd   = ~ sd(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

write.csv(benthic_cover_year, here("agrra_monitoring", "data_outputs", "benthic_cover_year.csv"))

Bar plots

kable(benthic_cover_year)
year site_cat lc_mean lc_sd cca_mean cca_sd ta_mean ta_sd tas_mean tas_sd fma_mean fma_sd cma_mean cma_sd cyan_mean cyan_sd ainv_mean ainv_sd peys_mean peys_sd
2017 NEMMA 0.1200074 0.0500671 0.1000121 0.0563351 0.1372388 0.0637481 0.1908383 0.1405502 0.2124538 0.0868144 0.0669921 0.0568517 0.0376893 0.0553593 0.0431588 0.0137899 0.0536530 0.0266282
2019 NDNP 0.0563083 0.0163313 0.0317857 0.0217454 0.0915333 0.1307974 0.5200929 0.1379033 0.1445060 0.0624581 0.0327048 0.0334719 0.0148095 0.0151115 0.0276940 0.0163452 0.0076321 0.0043003
2025 NDNP 0.0268379 0.0110233 0.0408207 0.0463093 0.0041743 0.0068380 0.4374636 0.1171587 0.3195486 0.0726502 0.0148298 0.0190004 0.0143074 0.0236870 0.0177629 0.0094187 0.0355738 0.0224526
2025 NEMMA 0.0422875 0.0308934 0.1540312 0.0875113 0.0435583 0.0454585 0.1662500 0.1147224 0.4294562 0.1503852 0.0386479 0.0361970 0.0168750 0.0210095 0.0237000 0.0283759 0.0551625 0.0429563
benthic_2025 <- benthic_cover_year %>%
  filter(year == 2025) %>%
  select(
    site_cat,
    lc_mean, lc_sd,
    cca_mean, cca_sd,
    ta_mean, ta_sd,
    tas_mean, tas_sd,
    fma_mean, fma_sd,
    cma_mean, cma_sd,
    ainv_mean, ainv_sd
  ) %>%
  pivot_longer(
    cols = -site_cat,                    # exclude site_cat
    names_to = c("metric", "stat"),
    names_sep = "_",
    values_to = "value"
  ) %>%
  pivot_wider(
    names_from = stat,
    values_from = value
  ) %>%
  mutate(
    metric = recode(
      metric,
      lc    = "Live coral",
      cca   = "CCA",
      ta    = "Turf algae",
      tas   = "Turf + sediment",
      fma   = "Fleshy macroalgae",
      cma   = "Crustose macroalgae",
      ainv  = "Other inverts"
    ),
    metric = factor(metric, levels = c(
      "Live coral",
      "CCA",
      "Crustose macroalgae",
      "Turf algae",
      "Turf + sediment",
      "Fleshy macroalgae",
      "Other inverts"
    ))
  )


ggplot(benthic_2025, aes(x = metric, y = mean, fill = metric)) +
  geom_col(
    position = position_dodge(width = 0.8),
    width = 0.7,
    color = "black",
    fill = "gray"
  ) +
  geom_errorbar(
    aes(ymin = mean - sd, ymax = mean + sd),
    width = 0.2,
    position = position_dodge(width = 0.8)
  ) +
  facet_wrap(~ site_cat) +
  scale_y_continuous(
    labels = scales::percent_format(accuracy = 1),
    expand = expansion(mult = c(0, 0.05))
  ) +
  labs(
    x = NULL,
    y = "Mean benthic cover (2025)"
  ) +
  theme_bw() +
  theme(
    panel.grid.major.x = element_blank(),
    panel.grid.minor = element_blank(),
    strip.background = element_blank(),
    strip.text = element_text(face = "bold"),
    axis.text.x = element_text(angle = 45, hjust = 1),
    legend.position = "none"
  )

ggsave(here("agrra_monitoring", "figs", "benthic_sum.png"), width = , height = 6)

Line plots by site

make_lp <- function(df, yvar, ylab) {
  
  p <- ggplot(df, aes(x = year, y = {{ yvar }}, group = site, shape = site)) +
  geom_point() +
  scale_shape_manual(values = 0:35) +
  geom_line() +
  scale_y_continuous(labels = label_percent()) +
  labs(x = "Year", y = ylab, color = "Site", group = "Site") +
  theme_minimal()
  
  p
}

make_lp(df = benthic_cover_site, lc_mean, "Live coral cover")
make_lp(df = benthic_cover_site, fma_mean, "FMA cover")
make_lp(df = benthic_cover_site, ta_mean, "TA cover")
make_lp(df = benthic_cover_site, tas_mean, "TA-sediment cover")

Delta plots

# preparing data - generating deltas for each metric/site_cat

benthic_cover_site_chg <- benthic_cover_site %>%
  group_by(site_cat) %>%
  mutate(
    timepoint = if_else(year == min(year), "baseline", "2025")
  ) %>%
  ungroup() %>%
  pivot_wider(
    id_cols = c(site, site_cat),
    names_from = timepoint,
    values_from = c(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cma_mean, ainv_mean),
    names_sep = "_"
  ) %>%
  mutate(
    d_lc  = lc_mean_2025  - lc_mean_baseline,
    d_cca = cca_mean_2025 - cca_mean_baseline,
    d_ta  = ta_mean_2025  - ta_mean_baseline,
    d_tas = tas_mean_2025 - tas_mean_baseline,
    d_fma = fma_mean_2025 - fma_mean_baseline,
    d_cma = cma_mean_2025 - cma_mean_baseline,
    d_ainv = ainv_mean_2025 - ainv_mean_baseline
  ) %>%
  pivot_longer(
    cols = starts_with("d_"),
    names_to = "metric",
    values_to = "delta"
  ) %>%
  mutate(
    metric = recode(metric,
      d_lc  = "Live coral",
      d_cca = "CCA",
      d_ta  = "Turf algae",
      d_tas = "Turf + sediment",
      d_fma = "Fleshy macroalgae",
      d_cma = "Crustose macroalgae",
      d_ainv = "Agg. invertebrates"
    )
  ) %>%
  select(site, site_cat, metric, delta) %>%
  mutate(delta_pct = delta * 100)

# generating stats table

# Function to compute bootstrapped mean CI
boot_mean_ci <- function(x, n_boot = 1000, conf = 0.95) {
  # Boot function
  boot_fn <- function(data, indices) mean(data[indices])
  
  b <- boot::boot(x, statistic = boot_fn, R = n_boot)
  
  ci <- boot::boot.ci(b, type = "perc")$percent[4:5]  # lower, upper
  return(ci)
}

delta_table <- benthic_cover_site_chg %>%
  group_by(site_cat, metric) %>%
  summarise(
    mean_delta = mean(delta_pct, na.rm = TRUE),
    ci = list(boot_mean_ci(delta_pct)),
    wilcox_p = wilcox.test(delta_pct, mu = 0)$p.value,
    .groups = "drop"
  ) %>%
  mutate(
    ci_lower = sapply(ci, `[[`, 1),
    ci_upper = sapply(ci, `[[`, 2)
  ) %>%
  select(-ci) %>%
  mutate(
    mean_delta = round(mean_delta, 2),
    ci_lower = round(ci_lower, 2),
    ci_upper = round(ci_upper, 2),
    wilcox_p = signif(wilcox_p, 3)
  ) %>%
  arrange(metric, site_cat)

kable(delta_table)
site_cat metric mean_delta wilcox_p ci_lower ci_upper
NDNP Agg. invertebrates -0.99 0.153000 -2.07 0.14
NEMMA Agg. invertebrates -1.95 0.148000 -3.50 0.03
NDNP CCA 0.90 0.463000 -0.60 2.82
NEMMA CCA 5.40 0.313000 -1.56 12.79
NDNP Crustose macroalgae -1.79 0.003050 -3.19 -0.70
NEMMA Crustose macroalgae -2.83 0.109000 -5.87 0.16
NDNP Fleshy macroalgae 17.50 0.000122 12.47 22.57
NEMMA Fleshy macroalgae 21.70 0.007810 13.42 29.81
NDNP Live coral -2.95 0.002320 -4.16 -1.65
NEMMA Live coral -7.77 0.015600 -11.27 -3.82
NDNP Turf + sediment -8.26 0.041900 -15.29 -0.94
NEMMA Turf + sediment -2.46 1.000000 -12.56 5.09
NDNP Turf algae -8.74 0.000610 -16.30 -3.67
NEMMA Turf algae -9.37 0.023400 -14.87 -2.79
# plotting - all metrics

ggplot(delta_table,
       aes(x = mean_delta, y = metric)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "grey50") +
  geom_errorbarh(
    aes(xmin = ci_lower, xmax = ci_upper),
    height = 0.2
  ) +
  geom_point(size = 3) +
  facet_wrap(~ site_cat, scales = "free_y") +
  labs(
    x = "Mean change (Δ)",
    y = NULL
  ) +
  theme_bw() +
  theme(
    strip.background = element_blank(),
    strip.text = element_text(face = "bold"),
    panel.grid.major.y = element_blank()
  )

ggsave(here("agrra_monitoring", "figs", "benthic_delta.png"), width = 6, height = 6)


# plotting - indv. metrics

plot_delta <- function(df, metric_name, y_label = NULL) {
  
  df_metric <- df %>% filter(metric == metric_name)
  
  if (is.null(y_label)) {
    y_label <- paste0("Change in percent ", metric_name, " cover")
  }
  
  # Compute symmetric limits around zero
  max_abs <- max(abs(df_metric$delta_pct), na.rm = TRUE)
  y_limits <- c(-max_abs, max_abs)
  
  ggplot(df_metric, aes(x = site_cat, y = delta_pct)) +
    geom_hline(yintercept = 0, linetype = "dashed", color = "grey50") +
    geom_quasirandom(alpha = 0.5, size = 2, width = 0.2) +
    stat_summary(fun.data = mean_cl_boot, geom = "errorbar", width = 0.2, size = 0.8) +
    stat_summary(fun = mean, geom = "point", size = 3, color = "black") +
    theme_classic() +
    coord_flip() +
    labs(x = "", y = y_label) +
    ylim(y_limits)   # <- this line sets symmetric y-axis
}


p_fma <- plot_delta(benthic_cover_site_chg, "Fleshy macroalgae")
p_lc <- plot_delta(benthic_cover_site_chg, "Live coral")

p_delta_indv <- p_fma / p_lc  # patchwork uses / for vertical layout
ggsave(here("agrra_monitoring", "figs", "benthic_delta_indv.png"), plot = p_delta_indv, width = 6, height = 8)

print(p_delta_indv)

Box plots by year

make_bp <- function(df, site_name, yvar, ylab,
                          y_max = NULL, test_method = "wilcox.test",
                          show_ns = FALSE, remove_x_text = FALSE,
                          title = TRUE, remove_y_text = FALSE) {

  df_sub <- df %>%
    filter(site_cat == site_name) %>%
    mutate(year = factor(as.character(year), levels = c("2017", "2019", "2025")))

  yrs <- sort(unique(as.character(df_sub$year)))
  comps <- if (length(yrs) >= 2) combn(yrs, 2, simplify = FALSE) else list()

  yvar_sym <- rlang::sym(yvar)

  p <- ggplot(df_sub, aes(x = year, y = !!yvar_sym)) +
    geom_boxplot(outlier.size = 1, width = 0.6) +
    geom_jitter(width = 0.15, size = 1, alpha = 0.6) +
    scale_y_continuous(labels = label_percent()) +
    labs(x = "Year", 
         y = if (!remove_y_text) ylab else NULL,
         title = if (title) site_name else NULL) +
    theme_pub() +
    theme(plot.title = element_text(size = 14, face = "bold", hjust = 0.5))

  if (remove_y_text) {
    p <- p + theme(
      axis.text.y = element_blank(),
      axis.ticks.y = element_blank(),
      axis.title.y = element_blank()
    )
  }

  if (remove_x_text) {
    p <- p + theme(
      axis.text.x = element_blank(),
      axis.ticks.x = element_blank(),
      axis.title.x = element_blank()
    )
  }

  if (!is.null(y_max)) {
    p <- p + coord_cartesian(ylim = c(0, y_max))
  }

  if (length(comps) > 0) {
    p <- p + stat_compare_means(
      method = test_method,
      comparisons = comps,
      label = "p.signif",
      hide.ns = !show_ns,
      vjust = 0)
  }

  p
}


p1 <- make_bp(benthic_cover_site, "NEMMA",
              yvar = "lc_mean", ylab = "Live coral",
              y_max = 0.30,
              remove_x_text = TRUE,
              title = TRUE,
              remove_y_text = FALSE)

p2 <- make_bp(benthic_cover_site, "NDNP",
              yvar = "lc_mean",
              y_max = 0.30,
              remove_x_text = TRUE,
              title = TRUE,
              remove_y_text = TRUE)

p3 <- make_bp(benthic_cover_site, "NEMMA",
              yvar = "fma_mean", ylab = "Fleshy macroalgae",
              y_max = 0.80,
              title = FALSE,
              remove_y_text = FALSE)

p4 <- make_bp(benthic_cover_site, "NDNP",
              yvar = "fma_mean",
              y_max = 0.80,
              title = FALSE,
              remove_y_text = TRUE)

final_plot <- (p1 | p2) / (p3 | p4)
final_plot

ggsave(here("agrra_monitoring", "figs", "lc_fma_cover.png"), plot = final_plot, width = 8, height = 6)

Radar plots

make_rc <- function(df, site_cat, title = NULL,
                            line_colors = c("coral", "skyblue4")) {

  # Filter the site
  df_site <- df %>%
    filter(site_cat == site_cat)

  # Extract mean columns only
  mean_cols <- df_site %>% select(contains("_mean"))

  # Compute max PC across all mean vars
  max_pc <- mean_cols %>% unlist() %>% max(na.rm = TRUE)

  # Count mean columns
  ncol_pc <- ncol(mean_cols)

  # Make radar data
  benthic_rdr <- rbind(
    rep(max_pc, ncol_pc),    # max row
    rep(0, ncol_pc),         # min row
    mean_cols %>% rename_with(~ gsub("_mean", "", .x))
  )

  # ---- FIX: convert years to character before make.unique() ----
  rownames(benthic_rdr) <- c(
    "max",
    "min",
    make.unique(as.character(df_site$year), sep = "_")
  )

  # Colors
  fill_colors <- alpha(line_colors, 0.5)

  # ---- Plot ----
  radarchart(
    benthic_rdr,
    pcol  = line_colors,
    pfcol = fill_colors,
    cglcol = "grey",
    title = ifelse(is.null(title), site_cat, title)
  )

  # ---- Legend ----
  legend(
    "topright",
    legend = rownames(benthic_rdr)[3:nrow(benthic_rdr)],
    col = line_colors,
    fill = fill_colors,
    lty = 1,
    lwd = 2,
    bty = "n",
    cex = 0.8
  )
}


make_rc(benthic_cover_year, site_cat = "NEMMA")

NEMMA

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean")))

benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",  
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors 
)

NDNP

max_pc <- benthic_cover_year %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean")) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(benthic_cover_year %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean")))


benthic_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        benthic_cover_year %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean")) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(benthic_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(benthic_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NMDS

Removing cyanobacteria here due to seasonality and peysonnelids in NDNP due to lack in confidence about identification

nmds_var <- c("lc", "cca", "ta", "tas", "fma", "cma", "ainv")

benthic_nmds <- benthic_cover_site %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(period = ifelse(year == "2025", "2025", "2017-2019")) %>%
  select(site, site_cat, period, nmds_var) %>%
  ungroup()

benthic_meta <- benthic_nmds %>%
  ungroup() %>%
  select(site, site_cat, period)

benthic_pc <- benthic_nmds %>%
  ungroup() %>%
  select(-site, -site_cat, -period)

set.seed(123)
benthic_nmds_ord <- metaMDS(
  benthic_pc,
  distance = "bray",
  k = 2,
  trymax = 100,
  trace = 0
)

site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site     = benthic_meta$site,
         site_cat = benthic_meta$site_cat,
         period     = benthic_meta$period)

pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

arrow_mult <- 2

pc_scores_scaled <- pc_scores %>%
  mutate(
    NMDS1 = NMDS1 * arrow_mult,
    NMDS2 = NMDS2 * arrow_mult,
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

p <- ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = period)) +
  geom_point(size = 1, alpha = 0.4) +
  stat_ellipse(aes(group = period), type = "t", linetype = 2) +
  facet_wrap(~ site_cat) +
  # excluding arrows in publication version
  # geom_segment(data = pc_scores_scaled,
  #              aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
  #              arrow = arrow(length = unit(0.3, "cm")),
  #              color = "black",
  #              inherit.aes = FALSE) +
  # geom_text(data = pc_scores_scaled,
  #           aes(x = NMDS1 + label_nudge_x,
  #               y = NMDS2 + label_nudge_y,
  #               label = category),
  #           color = "black", size = 3, inherit.aes = FALSE) +
  theme_pub() +
  labs(color = "Year") +
  theme(strip.text = element_text(size = 14, face = "bold"),
        legend.position = "bottom")
p

ggsave(plot = p, here("agrra_monitoring", "figs", "benthic_nmds.png"), width = 8, height = 5)
# PERMANOVA

### need to fix code so it distinguishes between year and site_cat in the model
set.seed(123)
adonis_res <- adonis2(
  as.matrix(benthic_nmds[, nmds_var]) ~ site_cat + period,
  data = benthic_nmds,
  permutations = 999,
  method = "bray"
)

permanova_table <- adonis_res %>%
  as.data.frame() %>%
  rownames_to_column("Factor") %>%
  filter(Factor != "Residual") %>%     # usually not shown
  select(Factor, Df, R2, F, `Pr(>F)`) %>%
  rename(p = `Pr(>F)`) %>%
  mutate(across(where(is.numeric), ~ round(.x, 3)))
kable(permanova_table)
Factor Df R2 F p
Model 2 0.539 23.985 0.001
Total 43 1.000 NA NA
ft <- flextable(permanova_table) |>
  autofit()

read_docx() |>
  body_add_flextable(ft) |>
  print(target = here("agrra_monitoring", "figs", "benthic_nmds_permanova.docx"))
# envfit

full_names <- c(
  lc  = "Live coral",
  ta  = "Turf algae",
  tas = "Turf algae – sediment",
  fma = "Fleshy macroalgae",
  cma = "Calcareous macroalgae",
  ainv = "Aggressive invertebrates",
  cca  = "Crustose coralline algae"
)

env_table <- envfit(benthic_nmds_ord, benthic_pc, permutations = 999) %>%
  (\(.) {
    arrows <- as.data.frame(.$vectors$arrows) %>%
      rownames_to_column("code")

    stats <- tibble(
      code = rownames(.$vectors$arrows),
      r2 = .$vectors$r,
      p = .$vectors$pvals
    )

    arrows %>%
      left_join(stats, by = "code") %>%
      mutate(
        category = full_names[code]
      ) %>%
      select(category, code, NMDS1, NMDS2, r2, p)
  })() %>%
  filter(p < 0.05) %>% # keep only significant variables
  arrange(p, desc(r2)) %>% # order by sig, then by r2
  mutate(across(
    where(is.numeric),
    ~ round(.x, 3)
  ))

# export table
library(gt)

gt_table <- env_table %>%
  select(-code) %>%   # remove code column
  gt() %>%
  fmt_number(
    columns = where(is.numeric),
    decimals = 3
  ) %>%
  cols_align(
    align = "center",
    columns = everything()
  ) %>%
  cols_label(
    category = "Category",
    NMDS1 = "NMDS1",
    NMDS2 = "NMDS2",
    r2 = "R²",
    p = "p-value"
  )

kable(env_table)
category code NMDS1 NMDS2 r2 p
Turf algae – sediment tas -0.997 0.080 0.979 0.001
Fleshy macroalgae fma 0.497 0.868 0.947 0.001
Turf algae ta 0.326 -0.945 0.701 0.001
Crustose coralline algae cca 0.941 -0.338 0.455 0.001
Live coral lc 0.385 -0.923 0.431 0.001
gtsave(gt_table, here("agrra_monitoring", "figs", "benthic_nmds_envfit.docx"))
#### NEMMA only
benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NEMMA") %>%
  select(-site_cat, 
         -contains("_sd"), 
         -contains("cyan"),
         -contains("peys")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
  geom_point(size = 4) +
  scale_shape_manual(values = 0:25) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)
#### NDNP only
benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NDNP") %>%
  select(-site_cat, 
         -contains("_sd"), 
         -contains("cyan"),
         -contains("peys")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

# Separate metadata and community data
benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

# Run NMDS
set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

# Extract NMDS scores and add metadata
site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

# Extract pc (community variable) scores and add metadata
pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

# Plot NMDS

## defining NMDS arrows
arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

## adding offset columns to move arrow labels
pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

## plot
ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
  geom_point(size = 4) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

# PERMANOVA (adonis2)
set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)

Site 12 is skewing this quite a bit - but don’t see reason to omit?

Coral species composition

Data overview:

  • 2017: “CoralCoverSpeciesScaledBySite”
    • One sheet (‘Data’) with percent cover by species
  • 2025: BenthicCoralCoverScaledByTransect.xlsx”
    • Separate sheets per family give percent cover by species - inconvenient format, need to bind all together

Import + wrangle data

# metadata for species - family matching
coral_meta <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  fill(scaled_coral_cover, column_header) 

coral_meta_fam <- coral_meta %>%
  filter(str_starts(column_header, "t") | column_header == "UNKN") %>%
  select(family_code = column_header, family = definition) %>%
  mutate(family = gsub("\\s*\\([^\\)]+\\)", "", family)) %>% # removing fluff
  distinct() # removing UNKN duplicate

coral_meta_spp <- coral_meta %>%
  filter(str_starts(scaled_coral_cover, "t") | scaled_coral_cover == "UNKN") %>%
  select(species_code = column_header, family_code = scaled_coral_cover, species = definition) %>%
  left_join(coral_meta_fam, by = "family_code") %>%
  mutate(genus = sub(" .*", "", species),
         family_code = str_remove(family_code, "^t"),
         across(c(species, family), ~ str_replace(., "Unknown sp. of Live Coral", "Unknown spp.")))

# historical data
coral_spp_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralCoverSpeciesScaled", "CoralCoverSpeciesScaledBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  mutate(across(c(avg, std), ~ .x / 100)) %>% # to match 2025 format (fraction of 1)
  select(site, year, species_code, avg, std) 

# 2025 data
file_coral_nemma <- here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")
file_coral_npa <- here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralCoverScaled.xlsx")

clean_excel_coral <- function(file_path) {
  sheets <- excel_sheets(file_path)
  sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall"))  # exclude

  sheets_list <- map(sheets_to_join, ~ read_excel(file_path, sheet = .x) %>%
                       rename_with(~ gsub("^(t)([A-Za-z]{4})(avg|med|std)$", "\\1_\\2_\\3", .x, ignore.case = TRUE)) %>%
                       rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x, ignore.case = TRUE)) %>%
                       clean_names())

  reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))
}

coral_spp_2025 <- clean_excel_coral(file_coral_npa) %>%
  bind_rows(clean_excel_coral(file_coral_nemma)) %>%
  mutate(year = 2025) %>%
  rename(site = name) %>%
  pivot_longer(
    cols = matches("^[a-z]{4}_(avg|std)$"),   # all columns like acer_ave, apal_std, etc.
    names_to = c("species_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  select(site, year, species_code, avg, std)

# merge for complete df
coral_spp_site <- bind_rows(coral_spp_hist, coral_spp_2025) %>%
  mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site)) %>%
  mutate(species_code = toupper(species_code),
         site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site)) %>%
  left_join(coral_meta_spp, by = "species_code")  %>%
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

### need to add 0s when nothing was present
coral_fam_site_year <- coral_spp_site %>%
  group_by(year, site_cat, site, family) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat, site) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, family) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()
chk <- coral_fam_site_year %>%
  group_by(year, site_cat, site) %>%
  summarize(tot = sum(rel_mean))

coral_spp_year <- coral_spp_site %>%
  group_by(year, site_cat, family, genus, species_code, species) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, species) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()

coral_fam_year <- coral_spp_year %>%
  group_by(year, site_cat, family) %>%
  summarize(mean = sum(mean)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean/sum(mean)) %>%
  group_by(year, site_cat) %>%
  complete(
    family = unique(coral_spp_year$family),
    fill = list(mean = 0, rel_mean = 0)
  ) %>%
  ungroup()
chk <- coral_fam_year %>%
  group_by(year, site_cat) %>%
  summarize(tot = sum(rel_mean))

coral_fam_year_wide <- coral_fam_year %>%
  pivot_wider(names_from = family,
              values_from = c(mean, rel_mean),
              names_glue = "{family}_{.value}")

Line plots

coral_fams <- c("Acroporidae", "Merulinidae", "Milleporidae", "Montastraeidae",  "Poritidae", "Siderastreidae")
ggplot(coral_fam_site_year %>%
         filter(family %in% coral_fams),
       aes(x = year, y = rel_mean, group = site)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Relative percent cover of live coral") +
  facet_grid(site_cat ~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(coral_fam_site_year %>%
         filter(family %in% coral_fams),
       aes(x = year, y = mean, group = site)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Percent cover") +
  facet_grid(site_cat ~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(coral_fam_year, aes(x = year, y = mean, group = site)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Mean percent cover", color = "Location") +
  facet_grid(site_cat ~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(coral_fam_year, aes(x = year, y = rel_mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  scale_y_continuous(labels = percent_format()) +
  labs(x = "Year", y = "Relative proportion", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Radar plots

NEMMA

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NEMMA") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NEMMA") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

png("/Users/margaretwilson/Github/emc/agrra_monitoring/figs/coral_radar_NEMMA.png", width = 2000, height = 2000, res = 300)

# --- draw radar chart ---
radarchart(
  coral_rdr,
  pcol = line_colors,
  pfcol = fill_colors,
  cglcol = "grey"
)

# --- add legend ---
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors
)

dev.off()
## quartz_off_screen 
##                 2
# radarchart(coral_rdr,
#            pcol = line_colors,
#            pfcol = fill_colors,
#            cglcol = "grey")
# 
# # doesn't like adding legend within rmarkdown - but can try within knit document
# legend(
#   x = "topright",
#   legend = rownames(coral_rdr)[3:4],  # skip max/min rows
#   col = line_colors,
#   lty = 1,
#   lwd = 2,
#   bty = "n",                   # no box
#   cex = 0.8,
#   pt.cex = 1.5,
#   fill = fill_colors            # show fill colors in legend
# )

NDNP

max_pc <- coral_fam_year_wide %>%
  filter(site_cat == "NDNP") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains("Unknown spp.") &
           where(~ any(. >= 0.001))) %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol_pc <- ncol(coral_fam_year_wide %>%
                  filter(site_cat == "NDNP") %>%
                  select(contains("_mean") & 
                           !contains("rel_mean") & 
                           !contains("Unknown spp.") &
                           where(~ any(. >= 0.001)))
)

coral_rdr <- 
  rbind(rep(max_pc, ncol_pc), 
        rep(0, ncol_pc), 
        coral_fam_year_wide %>%
          filter(site_cat == "NDNP") %>%
          column_to_rownames(var = "year") %>%
          select(contains("_mean") & 
                   !contains("rel_mean") & 
                   !contains("Unknown spp.") &
                   where(~ any(. >= 0.001))) %>%
          rename_with(~ gsub("_mean", "", .x))
        )
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(coral_rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(coral_rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Forest plots

Probably would show only NEMMA

# calculating paired changes per site, and 95% confidence intervals
coral_fam_site <- coral_spp_site %>%
  group_by(year, site_cat, site, family) %>%
  summarize(avg = mean(avg)) %>%
  filter(!family %in% c("Unknown spp.", "Scleractinia incertae sedis"))

# --- compute paired deltas per site, then summarize per family × site_cat ---
df_change <- coral_fam_site %>%
  filter(year %in% c(2017, 2019, 2025)) %>%
  # pivot so each site has avg_2017, avg_2019, avg_2025 (NA where missing)
  pivot_wider(
    id_cols = c(site_cat, family, site),
    names_from = year,
    values_from = avg,
    names_glue = "avg_{year}"
  ) %>%
  # baseline = 2017 if present otherwise 2019
  mutate(
    baseline_value = coalesce(avg_2017, avg_2019)
  ) %>%
  # keep only sites that have both a baseline and 2025
  filter(!is.na(baseline_value), !is.na(avg_2025)) %>%
  # compute paired delta per site
  mutate(delta = avg_2025 - baseline_value) %>%
  # summarize to family × site_cat
  group_by(site_cat, family) %>%
  summarise(
    n_sites    = n(),
    mean_change = mean(delta, na.rm = TRUE),
    sd_change   = sd(delta, na.rm = TRUE),
    se_change   = sd_change / sqrt(n_sites),
    ci_low      = mean_change - 1.96 * se_change,
    ci_high     = mean_change + 1.96 * se_change,
    .groups = "drop"
  )

# quick diagnostic: show how many sites contributed per family × site_cat
print(df_change %>% arrange(site_cat, desc(n_sites)))
## # A tibble: 24 × 8
##    site_cat family     n_sites mean_change sd_change se_change   ci_low  ci_high
##    <chr>    <chr>        <int>       <dbl>     <dbl>     <dbl>    <dbl>    <dbl>
##  1 NDNP     Acroporid…      14  -0.0000904 0.000243  0.0000649 -2.17e-4  3.68e-5
##  2 NDNP     Agariciid…      14  -0.0000734 0.000179  0.0000478 -1.67e-4  2.03e-5
##  3 NDNP     Astrocoen…      14  -0.000614  0.000855  0.000228  -1.06e-3 -1.66e-4
##  4 NDNP     Faviidae        14  -0.0000245 0.000211  0.0000564 -1.35e-4  8.60e-5
##  5 NDNP     Meandrini…      14  -0.0000170 0.0000638 0.0000170 -5.04e-5  1.64e-5
##  6 NDNP     Merulinid…      14  -0.000257  0.000689  0.000184  -6.18e-4  1.04e-4
##  7 NDNP     Millepori…      14   0.000223  0.00136   0.000364  -4.91e-4  9.36e-4
##  8 NDNP     Montastra…      14  -0.00113   0.00359   0.000961  -3.01e-3  7.51e-4
##  9 NDNP     Oculinidae      14   0         0         0          0        0      
## 10 NDNP     Pocillopo…      14   0.0000960 0.000278  0.0000742 -4.95e-5  2.41e-4
## # ℹ 14 more rows
# --- forest plot: one point per family per panel (site_cat) ---
p <- ggplot(df_change %>% 
              filter(site_cat == "NEMMA") %>%
              # group_by(site_cat) %>% 
              mutate(family = fct_reorder(family, mean_change)),
       aes(x = mean_change, y = family)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_pointrange(aes(xmin = ci_low, xmax = ci_high), size = 0.2) +
  # facet_wrap(~ site_cat, scales = "free_y") +
  scale_x_continuous(labels = label_percent(),
                     # breaks = c(-0.01, 0.00, 0.01),
                     limits = c(-.018, .018)) +
  theme_bw(base_size = 12) +
  labs(
    x = "Change in percent cover",
    y = "Family")

print(p)

ggsave(plot = p, here("agrra_monitoring", "figs", "coral_forest.png"), width = 4, height = 4)

Coral recruits

Data overview: - Historical: CoralRecruitsBySite.xlsx - family level - remove “DICH” code (not in 2025) - 2025: BenthicCoralRecruis.xlxs - species level - missing site-level standard deviation in data

Import + wrangle data

# using coral_meta_spp key generated in prev. section

# historical data

recruits_hist_sized <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Small") %>% 
  mutate (size = "small") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "CoralRecruits", "CoralRecruitsBySite.xlsx"), sheet = "Large") %>% 
          mutate (size = "large")) %>%
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(year %in% c(2017, 2019)) %>% # 2022 was incomplete NPA year
  pivot_longer(
    cols = matches(".*_(avg|std)$"),   # all columns ending in avg or std
    names_to = c("family_code", ".value"),        # split into species and measurement type
    names_sep = "_"
  ) %>%
  select(site, year, family_code, size, avg) 

# adding totals because above is only small vs. large
recruits_hist_tot <- recruits_hist_sized %>%
  group_by(site, year, family_code) %>%
  summarize(
    avg = sum(avg, na.rm = TRUE),
    .groups = "drop"
  ) %>%
  mutate(size = "total") %>%
  select(site, year, family_code, size, avg) 

recruits_hist <- bind_rows(recruits_hist_tot, recruits_hist_sized)

# 2025 data - switch to "Overall" sheet #########

recruits_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall") %>%
  bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "BenthicCoralRecruits.xlsx"), sheet = "Overall")) %>%
  rename_with(fix_names) %>%
  mutate(year = 2025) %>%
  rename(site = name) %>%
  select(year, site, matches("_(small|large|total)$")) %>%
  pivot_longer(
    cols = matches("_(small|large|total)$"),
    names_to = c("family_code", "size"),
    names_pattern = "^(.*?)_(small|large|total)$",
    values_to = "avg"
  ) %>%
  select(site, year, family_code, size, avg) %>%
  mutate(family_code = str_remove(family_code, "^t_"))
# no SD for 2025

# merge for complete df
recruits_fam_site <- bind_rows(recruits_hist, recruits_2025) %>%
  mutate(site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site),
         site = if_else(site == "Little Bird Patch", "Little Bird Backreef", site),
         family_code = toupper(family_code)) %>%
  left_join(coral_meta_spp %>%
              select(family_code, family) %>%
              distinct(), 
            by = "family_code") %>%
  mutate(family = if_else(family_code == "ALL", "All species", 
                          if_else(family_code == "CARB", "Unknown spp.", family))) %>%
  filter(family_code != "DICH") %>% # in historical data only, none present  
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()
  

recruits_fam_year <- recruits_fam_site %>%
  group_by(year, site_cat, family_code, family, size) %>%
  summarize(mean = mean(avg), 
            sd = sd(avg)) %>%
  group_by(year, site_cat) %>%
  mutate(rel_mean = mean / sum(mean)) %>%
  group_by(year, site_cat, family_code, family, size) %>%
  filter(sum(mean) > 0) %>% # removing species that were 0 in all years/site_cats
  ungroup()

Line plots by site

ggplot(recruits_fam_site %>%
         filter(family_code %in% c("ALL") & size == "total"), 
       aes(x = year, y = avg, color = site)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

ggplot(recruits_fam_site %>%
         filter(family_code != "ALL" & size == "total"), 
       aes(x = year, y = avg, color = site)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Recruit density (indv/m2)", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Box plots by year

recruits_tot_site <- recruits_fam_site %>%
  filter(family_code == "ALL" & size == "total")

p1 <- make_bp(recruits_tot_site, "NEMMA",
        yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
        y_max = 16,
        title = TRUE) +
  scale_y_continuous()

p2 <- make_bp(recruits_tot_site, "NDNP",
        yvar = "avg", ylab = expression("Coral recruits (indv. m"^{-2}*")"),
        y_max = 16,
        title = TRUE,
        remove_y_text = FALSE) +
  scale_y_continuous()

final_plot <- (p1 | p2)
final_plot

ggsave(here("agrra_monitoring", "figs", "recruits.png"), plot = final_plot, width = 6, height = 4)

Diadema

Data overview: - 2025: MotileInverts.xlsx - 2017-2019: MotileInvertsBySite.xlsx

Import + wrangle data

diadema_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "MotileInverts", "MotileInvertsBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  select(site, year, matches("^dt_|^da_|^dj_"))

diadema_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "MotileInverts.xlsx"), sheet = "Overall") %>% 
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "MotileInverts.xlsx"), sheet = "Overall")) %>%
  rename_with(fix_names) %>%
  mutate(year = 2025,
         name = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site = name, year, matches("^dt_|^da_|^dj_"), -ends_with("_med"))

diadema <- rbind(diadema_hist, diadema_2025) %>%
  filter(year != 2022) %>% # incomplete year for NPA
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA"))) %>%
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

diadema_site <- diadema %>%
  select(site_cat, site, year, dt_avg, da_avg, dj_avg) %>%  # keep density columns
  pivot_longer(
    cols = c(dt_avg, da_avg, dj_avg),
    names_to = "stage",
    values_to = "density"
  ) %>%
  mutate(
    stage = recode(stage,
                   dt_avg = "Total",
                   da_avg = "Adult",
                   dj_avg = "Juvenile"))

diadema_year <- diadema %>%
  select(site_cat, year, dt_avg, da_avg, dj_avg) %>%  # keep density columns
  pivot_longer(
    cols = c(dt_avg, da_avg, dj_avg),
    names_to = "stage",
    values_to = "density"
  ) %>%
  mutate(
    stage = recode(stage,
                   dt_avg = "Total",
                   da_avg = "Adult",
                   dj_avg = "Juvenile")) %>%
  group_by(site_cat, year, stage) %>%
  summarise(
    mean_density = mean(density, na.rm = TRUE),
    sd_density   = sd(density, na.rm = TRUE),
    .groups = "drop"
  )

Bar plots

ggplot(diadema_year, aes(x = factor(year), y = mean_density, fill = stage)) +
  geom_col(position = "dodge") +
  geom_errorbar(aes(ymin = mean_density - sd_density, ymax = mean_density + sd_density),
                position = position_dodge(width = 0.9), width = 0.2) +
  facet_wrap(~site_cat) +
  labs(x = "Year", y = "Urchin Density (mean ± SD)", fill = "Stage") +
  theme_minimal(base_size = 14)

Line plots

ggplot(diadema_site %>% filter(stage %in% c("Adult", "Juvenile")), aes(x = factor(year), y = density, group = site)) +
  geom_line() +
  geom_point(alpha = .5) +
  facet_grid(stage ~ site_cat) +
  labs(x = "Year", y = "Diadema density", fill = "Stage") +
  theme_bw(base_size = 14)

Delta plots

Fish

Data overview:

Notes + questions:

  • To include commercially significant species, would have to go back to raw 2025 data or do custom calculations by family

Biomass

Data overview:

  • 2017: FishBiomassByTransect.xlsx
    • “Data” sheet has transect-level data by family and group
  • 2025: FishBiomassByTransect.xlsx
    • “Overall” sheet has transect-level data by family
    • Family sheets have transect-level data by species

Import + wrangle data

# create compatible family key by merging metadata
fish_fam_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Key") %>% 
  clean_names() %>%
  slice(21:40) %>%
  select(code_com = 1,
         definition = 2) %>%
  mutate(family = str_remove(word(definition, 1), ":"),
         family = if_else(family == "Wrasse", "Wrasses", 
                          if_else(code_com == "PUFF", "Pufferfishes", family))) %>%
  select(-definition)

fish_fam_2025_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Metadata") %>% 
  clean_names() %>%
  slice(23:42) %>%
  select(code_lat = 2,
         definition = 3) %>%
  mutate(family = str_remove(word(definition, 1), ":")) %>%
  select(-definition)

fish_fam_key <- full_join(fish_fam_hist_temp, fish_fam_2025_temp, by = "family") %>%
  mutate(code_lat = tolower(gsub("^t", "", code_lat)),
         code_com = tolower(code_com)) %>%
  select(code_com, code_lat, family) %>%
  bind_rows(
    tibble(code_com = "t",
           code_lat = "all",
           family   = "Total"))

# import data
fish_bm_hist_temp <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  select(site, year, contains("avg"), contains("std")) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_com") %>%
  select(site, year, code = code_lat, family, stat, value)

fish_bm_2025_temp <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall"),
                      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomass.xlsx"), sheet = "Overall")) %>%
  # deal with weird capitalization in species col names
  rename_with(fix_names) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  mutate(year = 2025,
         site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site, year, contains("avg"), contains("std")) %>%
  rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_lat") %>%
  select(site, year, code = code_lat, family, stat, value)

# merge datasets
fish_bm_site_temp <- rbind(fish_bm_hist_temp, fish_bm_2025_temp) %>%
 mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA site inconsistencies
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
  filter(year != 2022,
         !is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

herb_bm_site_temp <- fish_bm_site_temp %>%
  filter(code %in% c("scar", "acan")) %>%
  group_by(site, site_cat, year, stat) %>%
  summarise(
    value = if_else(stat == "avg", sum(value, na.rm = TRUE), 
                    sqrt(sum(value^2, na.rm = TRUE))), # for std
    .groups = "drop"
  ) %>%
  mutate(family = "Herbivores", code = "herb") %>%
  distinct()

fish_bm_site <- rbind(fish_bm_site_temp, herb_bm_site_temp) %>%
  pivot_wider(
    names_from = stat,   # use 'avg' and 'std' as new column names
    values_from = value  # fill those columns with the numeric values
  ) %>%
  unnest(c(avg, std)) # make numeric instead of list
# wide version in benthic ~ fish section

fish_bm_year <- fish_bm_site %>%
  group_by(year, site_cat, family, code) %>%
  summarize(mean = mean(avg),
            std = sd(avg)) %>%
  # don't actually need this complete() here, but including as precaution
  group_by(year, site_cat) %>%
  complete(
    family = unique(fish_bm_site$family),
    fill = list(mean = 0)) %>% # list(mean = 0, rel_mean = 0)
  ungroup()

# wide format for graphing purposes
fish_bm_year_wide <- fish_bm_year %>%
  select(-code) %>%
  pivot_wider(names_from = family,
              values_from = c(mean, std),
              names_glue = "{family}_{.value}")

# clean environment and export
# rm(list = ls(pattern = "_temp$"))
write.csv(fish_bm_year, here("agrra_monitoring", "data_outputs", "fish_bm_year.csv"))

Line plots

ggplot(fish_bm_site %>% filter(code == "all"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Total fish biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "scar"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Scarid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "acan"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Acanthurid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_site %>% filter(code == "serr"), 
       aes(x = year, y = avg, group = site, color = site)) +
  geom_point() +
  geom_line() +
  labs(x = "Year", y = "Serranid biomass (g/100m2)", color = "Site", group = "Site") +
  theme_minimal()

ggplot(fish_bm_year, aes(x = year, y = mean, group = site_cat, color = site_cat)) +
  geom_point() +
  geom_line() + 
  labs(x = "Year", y = "Biomass", color = "Location") +
  facet_wrap(~ family) +
  theme_bw() +
  theme(axis.text.x = element_text(angle = 45, hjust = 1))

Radar plots

NEMMA

data <- fish_bm_year_wide %>%
  filter(site_cat == "NEMMA") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>%
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

# doesn't like adding legend within rmarkdown - but can try within knit document
legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

NDNP

data <- fish_bm_year_wide %>%
  filter(site_cat == "NDNP") %>%
  column_to_rownames(var = "year") %>%
  select(contains("_mean") & 
           !contains("rel_mean") & 
           !contains(c("Total", "Herbivores")) &
           where(~ any(. >= 100))) %>% # at least one mean bm value must be over 100
  rename_with(~ gsub("_mean", "", .x))

max <- data %>%
  unlist() %>%
  max(na.rm = TRUE)

ncol <- ncol(data)

rdr <- 
  rbind(rep(max, ncol), 
        rep(0, ncol), 
        data)
  
line_colors <- c("coral", "skyblue4")
fill_colors <- alpha(line_colors, 0.5)

radarchart(rdr,
           pcol = line_colors,
           pfcol = fill_colors,
           cglcol = "grey")

legend(
  x = "topright",
  legend = rownames(rdr)[3:4],  # skip max/min rows
  col = line_colors,
  lty = 1,
  lwd = 2,
  bty = "n",                   # no box
  cex = 0.8,
  pt.cex = 1.5,
  fill = fill_colors            # show fill colors in legend
)

Density

Import + wrangle data

# import data
fish_den_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishDensity", "FishDensityBySite.xlsx"), sheet = "Data") %>% 
  rename_with(fix_names) %>%
  mutate(year = year(date)) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  select(site, year, contains("avg"), contains("std")) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_com", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_com") %>%
  select(site, year, code = code_lat, family, stat, value)

fish_den_2025 <- rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall"),
                      read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishDensity.xlsx"), sheet = "Overall")) %>%
  # deal with weird capitalization in species col names
  rename_with(fix_names) %>%
  filter(nt >= 8) %>% # keeping only sites with 8 or more transects
  mutate(year = 2025,
         site = if_else(name == "Little Bird Patch", "Little Bird Backreef", name)) %>%
  select(site, year, contains("avg"), contains("std")) %>%
  rename_with(~ gsub("^t_", "", .x), .cols = -c(site, year)) %>%
  pivot_longer(
    cols = -c(site, year),  
    names_to = "family_stat",      
    values_to = "value"                 
  ) %>%
  separate(family_stat, into = c("code_lat", "stat"), sep = "_(?=[^_]+$)") %>%
  left_join(fish_fam_key, by = "code_lat") %>%
  select(site, year, code = code_lat, family, stat, value)

# merge datasets
fish_den_site <- rbind(fish_den_hist, fish_den_2025) %>%
 mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA site inconsistencies
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # one erroneous 2024 date
  filter(year != 2022,
         !is.na(family)) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup() %>%
  pivot_wider(
    names_from = stat,   # use 'avg' and 'std' as new column names
    values_from = value  # fill those columns with the numeric values
  ) %>%
  unnest(c(avg, std)) # make numeric instead of list

Forest plots

# calculating paired changes per site, and 95% confidence intervals

# --- compute paired deltas per site, then summarize per family × site_cat ---
df_change <- fish_den_site %>%
  filter(year %in% c(2017, 2019, 2025)) %>%
  # pivot so each site has avg_2017, avg_2019, avg_2025 (NA where missing)
  pivot_wider(
    id_cols = c(site_cat, family, site),
    names_from = year,
    values_from = avg,
    names_glue = "avg_{year}"
  ) %>%
  # baseline = 2017 if present otherwise 2019
  mutate(
    baseline_value = coalesce(avg_2017, avg_2019)
  ) %>%
  # keep only sites that have both a baseline and 2025
  filter(!is.na(baseline_value), !is.na(avg_2025)) %>%
  # compute paired delta per site
  mutate(delta = avg_2025 - baseline_value) %>%
  # summarize to family × site_cat
  group_by(site_cat, family) %>%
  summarise(
    n_sites    = n(),
    mean_change = mean(delta, na.rm = TRUE),
    sd_change   = sd(delta, na.rm = TRUE),
    se_change   = sd_change / sqrt(n_sites),
    ci_low      = mean_change - 1.96 * se_change,
    ci_high     = mean_change + 1.96 * se_change,
    .groups = "drop"
  )

# quick diagnostic: show how many sites contributed per family × site_cat
print(df_change %>% arrange(site_cat, desc(n_sites)))
## # A tibble: 42 × 8
##    site_cat family       n_sites mean_change sd_change se_change  ci_low ci_high
##    <chr>    <chr>          <int>       <dbl>     <dbl>     <dbl>   <dbl>   <dbl>
##  1 NDNP     Angelfishes       14    -0.285       0.391    0.105  -0.490  -0.0804
##  2 NDNP     Barracudas        14     0.00516     0.127    0.0340 -0.0614  0.0717
##  3 NDNP     Boxfishes         14    -0.106       0.149    0.0397 -0.184  -0.0281
##  4 NDNP     Butterflyfi…      14    -1.42        1.47     0.393  -2.19   -0.647 
##  5 NDNP     Chubs             14    -0.0643      0.214    0.0571 -0.176   0.0477
##  6 NDNP     Damselfishes      14    -0.475       0.672    0.180  -0.828  -0.123 
##  7 NDNP     Filefishes        14    -0.348       0.392    0.105  -0.554  -0.143 
##  8 NDNP     Groupers          14    -6.64        2.85     0.762  -8.14   -5.15  
##  9 NDNP     Grunts            14    -3.97        5.65     1.51   -6.93   -1.02  
## 10 NDNP     Jacks             14    -0.151       0.777    0.208  -0.558   0.256 
## # ℹ 32 more rows
# --- forest plot: one point per family per panel (site_cat) ---
p <- ggplot(df_change %>% group_by(site_cat) %>% mutate(family = fct_reorder(family, mean_change)),
       aes(x = mean_change, y = family)) +
  geom_vline(xintercept = 0, linetype = "dashed", color = "gray60") +
  geom_pointrange(aes(xmin = ci_low, xmax = ci_high), size = 0.2) +
  facet_wrap(~ site_cat, scales = "free_y") +
  theme_bw(base_size = 12) +
  labs(
    x = expression("Change in density (indv. 100m"^{-2}*")"),
    y = "Family")

ggsave(plot = p, here("agrra_monitoring", "figs", "fish_den_forest.png"), width = 6, height = 4)

Length

Data overview:

  • 2017-2022: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + raw numbers and % in each size bin
    • % is out of 100
    • Everything above 40cm gets aggregated
    • One sheet per family has transect-level size data
  • 2025: FishSizeFreqByTransect.xlsx (binned data only)
    • NF (number of fish) + % in each size bin
    • % is out of one (proportion)
    • Over 40cm, bins are by 10cm to the nearest 10cm until <200cm
    • “Overall” sheet has transect-level size data
    • Family sheets have data by family

Notes + questions:

  • Dealing with >40cm fish given aggregation into one group in historical data? Need to check raw data
  • Correct to exclude transects with no fish and include in mean size calculations?
  • Weighted means: calculate midpoint representation of each size bin and weight by the number of counts per bin

Scarids

Import + wrangle data

# starting with scarids only as mock-up
scar_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "PARR") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSCAR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(scar_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
scar_length_long <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

scar_length_site <- scar_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

scar_length_year <- scar_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(scar_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Scaridae", y = "Density") +
  theme_minimal()

Chi-square tests

# transform data for summaries
scar_length_long_binned <- scar_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "size_bin", 
    values_to = "percent"
  ) %>%
  rename(nf_tot = nf) %>%
  mutate(nf_bin = nf_tot * percent)

scar_counts <- scar_length_long_binned %>%
  mutate(size_bin = case_when(
    size_bin %in% c("21_30", "31_40", "41_up") ~ "21_up",
    TRUE ~ size_bin)) %>%
  group_by(year, site_cat, size_bin) %>%
  summarise(count = sum(nf_bin), .groups = "drop")
NEMMA
nemma_table <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_nemma <- chisq.test(as.matrix(nemma_table[,-1]))
chisq_result_nemma
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(nemma_table[, -1])
## X-squared = 81.011, df = 3, p-value < 2.2e-16
NDNP
ndnp_table <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  select(year, size_bin, count) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0)

chisq_result_ndnp <- chisq.test(as.matrix(ndnp_table[,-1]))
chisq_result_ndnp
## 
##  Pearson's Chi-squared test
## 
## data:  as.matrix(ndnp_table[, -1])
## X-squared = 15.239, df = 3, p-value = 0.001623

Mosaic plots

tab <- scar_counts %>%
  filter(site_cat == "NEMMA") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NEMMA"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

tab <- scar_counts %>%
  filter(site_cat == "NDNP") %>%
  mutate(size_bin = factor(size_bin, levels = c("0_5","6_10","11_20","21_up"))) %>%
  arrange(size_bin) %>%
  pivot_wider(names_from = year, values_from = count, values_fill = 0) %>%
  select(-site_cat)

# convert to matrix, drop the size_bin column
mat <- as.matrix(tab %>% select(-size_bin))
row.names(mat) <- tab$size_bin

mosaicplot(mat, 
           color = TRUE,           # color each year
           main = paste("Fish size distribution by year —", "NDNP"),
           xlab = "Size Bin",
           ylab = "Year",
           las = 1)                # rotate y-axis labels

Phase changes

Currently only 2025 data, because appears that 2017/2019 have no phase data…?

# raw historical data

fish_tax_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)

fish_raw_hist <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              rename(transect = id, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, "-", "_"),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_hist, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species) # no terminal phase in this data?

# raw 2025 data

fish_tax_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "FishTaxonomy", skip = 1, col_names = TRUE) %>% # deals with merged headers
  clean_names() %>%
  unite(species, genus, species, sep = " ", remove = TRUE)
# could replace NA species with 'spp.' before uniting

chk <- fish_tax_hist %>% # make sure taxonomic lookups are consistent
  full_join(fish_tax_2025, by = "id", suffix = c("_hist", "_2025")) %>%
  mutate(match = species_hist == species_2025)

fish_raw_2025 <- read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
  clean_names() %>%
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
              clean_names() %>%
              select(transect = id, survey, date = surveyed) %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                          clean_names() %>%
                          select(survey = id, site = name))) %>%
  bind_rows(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "FishRaw.xlsx"), sheet = "Counts Raw") %>%
              clean_names() %>%
              left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Transect") %>%
                          clean_names() %>%
                          select(transect = id, survey, date = surveyed) %>%
                          left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Raw", "Metadata.xlsx"), sheet = "Survey") %>%
                                      clean_names() %>%
                                      select(survey = id, site = name)))
  ) %>%
  clean_names() %>%
  mutate(year = year(date),
         size_class = str_replace_all(size_class, " - ", "_"),
         size_class = str_replace_all(size_class, "cm", ""),
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                                    "NEMMA")),
         # convert bin names like "0_5" -> 2.5
         length_mid = str_extract(size_class, "\\d+_?\\d*"),
         length_mid = ifelse(str_detect(length_mid, "_"),
                             sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                             as.numeric(length_mid))) %>%
  left_join(fish_tax_2025, by = c("taxonomy" = "id")) %>%
  select(year, site, site_cat, trans = transect, size_class, length_mid, common_name, common_family, family, species, terminal_phase)

# merge all years

# fish_raw <- bind_rows(fish_raw_hist, fish_raw_2025)

# scarids only

scarids <- fish_raw_2025 %>%
  filter(species %in% c("Sparisoma viride", "Sparisoma aurofrenatum", "Scarus vetula", "Scarus iseri")) %>%
  mutate(sex = case_when(terminal_phase == "No" ~ "Female",
                         terminal_phase == "Yes" ~ "Male"),
         size_class = factor(size_class, levels = c("0_5","6_10","11_20","21_30", "31_40", "41_50")))

ggplot(scarids, aes(x = length_mid, fill = sex)) +
  geom_histogram(stat = "count", alpha = 0.8, position = position_dodge()) +
  facet_wrap(vars(species), ncol = 4) +
  geom_vline(data = filter(scarids, species == "Sparisoma viride"), aes(xintercept = 18, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus vetula"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Sparisoma aurofrenatum"), aes(xintercept = 15, color = "Minimum length at maturity"), linetype = "dashed") +
  geom_vline(data = filter(scarids, species == "Scarus iseri"), aes(xintercept = 19, color = "Minimum length at maturity"), linetype = "dashed") +
  scale_color_manual(values = c("black")) +
  scale_fill_manual(values=c("pink2", "turquoise3")) +
  theme_bw() +
  labs(x = "Fish length (cm)", y = "Number observed", fill="") +
  theme(strip.text = element_text(face = "italic"),
        legend.title = element_blank(),
        legend.position = "top",
        axis.text.x = element_text(angle = 45, hjust = 1))

Serranids

Import + wrangle data

# starting with scarids only as mock-up
serr_length_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishSizeFreq", "FishSizeFreqByTransect.xlsx"), sheet = "GROU") %>%
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date),
         across(
    .cols = contains("percent_"),
    .fns  = ~ .x / 100)) %>%
  select(site, year, transect = trans, nf, contains("percent_")) %>%
  rename_with(~ str_remove_all(.x, "percent_")) %>%
  rename("41_up" = "40") %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025,
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>% # select only columns with digits (e.g., 50, 60) and sum across these to be consistent with 2017 data, should not affect most of our fish families of interest
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishSizeFreqByTransect.xlsx"), sheet = "tSERR") %>%
          clean_names() %>%
          mutate(year = 2025) %>%
          select(site = survey_name, year, transect = transect_name, nf, starts_with("x")) %>%
          rename_with(~ str_remove_all(.x, "^(x)|(cm$)")) %>%
          rowwise() %>% 
          mutate("41_up" = sum(c_across(matches("^\\d+$")), na.rm = TRUE)) %>%
          ungroup() %>%
          select(!matches("^\\d+$"))) %>%
  mutate(site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # only keeping sites with over time comparisons
  ungroup()

# check that all transects are present even if no fish per family were recorded - should have 0 entries if all is well
# chk <- full_join(serr_length_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  fish_bm_trans %>%
#                    group_by(year, site) %>%
#                    summarize(count = n()), 
#                  by = c("year", "site"), suffix = c("_length", "_bm")) %>%
#   filter(count_length != count_bm | is.na(count_length) | is.na(count_bm))

# transform data for summaries
serr_length_long <- serr_length_trans %>%
  filter(nf != 0) %>%
  pivot_longer(
    cols = matches("^\\d+_?\\d?"), 
    names_to = "length_mid", 
    values_to = "percent"
  ) %>%
  mutate(
    # convert bin names like "0_5" -> 2.5
    length_mid = str_extract(length_mid, "\\d+_?\\d*"),
    length_mid = ifelse(str_detect(length_mid, "_"),
                        sapply(str_split(length_mid, "_"), function(x) mean(as.numeric(x))),
                        as.numeric(length_mid))) %>%
  group_by(year, site, transect) %>%
  mutate(prop = percent / sum(percent)) %>% # accounts for small deviations where total proportions don't equal exactly 1
  ungroup() %>%
  select(-percent)

serr_length_site <- serr_length_long %>%
  group_by(year, site, site_cat, length_mid) %>%
  summarise(mean_prop = mean(prop), .groups = "drop")

serr_length_year <- serr_length_site %>%
  group_by(year, site_cat, length_mid) %>%
  summarise(mean_prop = mean(mean_prop), .groups = "drop")

Density plots

ggplot(serr_length_year %>% 
         mutate(year = as.character(year)), 
       aes(x = length_mid, weight = mean_prop, group = year, color = year, fill = year)) +
  geom_density(alpha = 0.3, adjust = 1) +
  facet_wrap(~ site_cat, ncol = 2) +
  labs(x = "Fish length (cm) - Serranidae", y = "Density") +
  theme_minimal()

Herbivores

# diadema_site
# fish_bm_site

# herb_fam_site <- 

Benthic ~ Fish

Integrate datasets

benth_fish_site <- fish_bm_site %>%
  select(-family) %>%
  rename(mean = avg) %>%
  pivot_wider(names_from = code,
              values_from = c(mean, std),
              names_glue = "{code}_{.value}") %>%
  left_join(benthic_cover_site, by = c("site", "site_cat", "year"))

Exploratory models and plots

No statistically significant relationships found (including when scarid and acanthurids are aggregated)

mod <- lm(fma_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.23116 -0.11016 -0.01080  0.08115  0.38401 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.083e-01  3.988e-02   7.730 3.71e-09 ***
## scar_mean   -3.646e-05  2.748e-05  -1.327    0.193    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1393 on 36 degrees of freedom
## Multiple R-squared:  0.04664,    Adjusted R-squared:  0.02016 
## F-statistic: 1.761 on 1 and 36 DF,  p-value: 0.1928
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(fma_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = fma_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.20618 -0.10757 -0.01420  0.07757  0.41048 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  3.203e-01  4.543e-02   7.051 2.82e-08 ***
## herb_mean   -2.266e-05  1.607e-05  -1.410    0.167    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.1389 on 36 degrees of freedom
## Multiple R-squared:  0.05235,    Adjusted R-squared:  0.02602 
## F-statistic: 1.989 on 1 and 36 DF,  p-value: 0.1671
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = fma_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "FMA cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ scar_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ scar_mean, data = benth_fish_site)
## 
## Residuals:
##      Min       1Q   Median       3Q      Max 
## -0.05273 -0.02691 -0.01161  0.01497  0.15619 
## 
## Coefficients:
##              Estimate Std. Error t value Pr(>|t|)    
## (Intercept) 4.675e-02  1.209e-02   3.869 0.000442 ***
## scar_mean   6.717e-06  8.326e-06   0.807 0.425141    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04221 on 36 degrees of freedom
## Multiple R-squared:  0.01776,    Adjusted R-squared:  -0.009529 
## F-statistic: 0.6508 on 1 and 36 DF,  p-value: 0.4251
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = scar_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Scarid biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

mod <- lm(lc_mean ~ herb_mean, data = benth_fish_site)
summary(mod)
## 
## Call:
## lm(formula = lc_mean ~ herb_mean, data = benth_fish_site)
## 
## Residuals:
##       Min        1Q    Median        3Q       Max 
## -0.045407 -0.028903 -0.009291  0.014008  0.156349 
## 
## Coefficients:
##               Estimate Std. Error t value Pr(>|t|)    
## (Intercept)  5.741e-02  1.392e-02   4.124  0.00021 ***
## herb_mean   -1.069e-06  4.924e-06  -0.217  0.82931    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## Residual standard error: 0.04256 on 36 degrees of freedom
## Multiple R-squared:  0.001308,   Adjusted R-squared:  -0.02643 
## F-statistic: 0.04716 on 1 and 36 DF,  p-value: 0.8293
r2 <- summary(mod)$r.squared
pval <- summary(mod)$coefficients[2,4]

ggplot(benth_fish_site, aes(x = herb_mean, y = lc_mean)) +
  geom_point() +
  geom_smooth(method = "lm", se = TRUE, color = "steelblue") +
  labs(x = "Herbivore biomass", y = "Live coral cover") +
  annotate("text", x = Inf, y = Inf,
           label = paste0("R² = ", round(r2, 2), 
                          "\nP = ", signif(pval, 2)),
           hjust = 1.1, vjust = 1.2, size = 4) +
  theme_bw()

Correlation tests

Testing correlation between scarid biomass and benthic variables - nothing is notably strong

benth_vars <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean, cyan_mean, ainv_mean, peys_mean)

cor_results <- cor(benth_vars, benth_fish_site$scar_mean, use = "pairwise.complete.obs")
cor_results
##                  [,1]
## lc_mean    0.13324977
## cca_mean   0.38089582
## ta_mean    0.21085969
## tas_mean  -0.18002397
## fma_mean  -0.21595995
## cyan_mean  0.25669404
## ainv_mean  0.36476851
## peys_mean -0.01637593

Multivariate tests (PERMANOVA)

benth_mat <- benth_fish_site %>%
  select(lc_mean, cca_mean, ta_mean, tas_mean, fma_mean)

adonis2(benth_mat ~ scar_mean + site_cat + year,
        data = benth_fish_site,
        method = "bray",
        permutations = 999,
        by = "term")
## Permutation test for adonis under reduced model
## Terms added sequentially (first to last)
## Permutation: free
## Number of permutations: 999
## 
## adonis2(formula = benth_mat ~ scar_mean + site_cat + year, data = benth_fish_site, permutations = 999, method = "bray", by = "term")
##           Df SumOfSqs      R2       F Pr(>F)    
## scar_mean  1  0.15087 0.05318  5.3525  0.005 ** 
## site_cat   1  1.13209 0.39906 40.1633  0.001 ***
## year       1  0.59559 0.20994 21.1298  0.001 ***
## Residual  34  0.95837 0.33782                   
## Total     37  2.83693 1.00000                   
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
res <- adonis2(benth_mat ~ scar_mean + site_cat + year,
               data = benth_fish_site,
               method = "bray",
               permutations = 999,
               by = "term")

res_df <- broom::tidy(res)

ggplot(res_df, aes(x = reorder(term, R2), y = R2)) +
  geom_col(fill = "skyblue") +
  coord_flip() +
  labs(x = "Predictor", y = "Proportion of variation explained (R²)") +
  theme_bw()

Archive

access_meta_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Transect") %>%
  clean_names() %>% 
  left_join(read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Raw", "MSAccess-Metadata.xlsx"), sheet = "Survey") %>%
              clean_names() %>%
              rename(survey = id)) %>%
  select(transect = id, date = surveyed, site = name)
  


# checking percent that are unknown
chk <- coral_spp_hist %>%
  mutate(category = if_else(species == "unkn", "unknown", "known")) %>%
  group_by(site, year, category) %>%
  summarize(cover = sum(avg))

# old biomass import code - holding for now
fish_bm_trans <- read_excel(here("agrra_monitoring", "data_raw", "ATG_2017-2022", "Calculated", "FishBiomass", "FishBiomassByTransect.xlsx"), sheet = "Data") %>% 
  clean_names() %>%
  filter(!is.na(site)) %>%
  mutate(year = year(date)) %>% 
  select(site, year, transect = trans, t_tot = t, t_scar = parr, t_acan = surg, t_serr = grou) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NEMMA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed),
                 survey_name = if_else(survey_name == "Little Bird Patch", "Little Bird Backreef", survey_name)) %>% # inconsistency in site name between 2017 and 2025
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  rbind(read_excel(here("agrra_monitoring", "data_raw", "ATG_NPA_2025", "Calculated", "FishBiomassByTransect.xlsx"), sheet = "Overall") %>% 
          clean_names() %>%
          mutate(year = year(surveyed)) %>%
          select(site = survey_name, year, transect = transect_name, t_tot = all, t_scar, t_acan, t_serr)) %>%
  mutate(t_herb = t_scar + t_acan,
         site = if_else(str_detect(site, "^Site \\d+"), 
                        str_extract(site, "^Site \\d+"), 
                        site), # dealing with NPA sites that have site numbers with different name variations afterwards
         site_cat = if_else(str_detect(site, "^Site \\d+"), "NDNP",
                            if_else(str_detect(site, "Redonda"), "Redonda",
                            "NEMMA"))) %>%
  mutate(year = if_else(year == 2024, 2025, year)) %>% # Site 11 had one erroneous 2024 date
  filter(year != 2022) %>% # 2022 was an incomplete data collection year for NPA
  group_by(site, year) %>%
  filter(n_distinct(transect) >= 8) %>% # keeping only sites with at least 8 transects per year
  group_by(site) %>%
  filter(n_distinct(year) > 1) %>% # keeping only sites with over time comparisons
  ungroup()

fish_bm_site <- fish_bm_trans %>%
  group_by(site, site_cat, year) %>%
  rename_with(~ str_remove(.x, "t_")) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

fish_bm_year <- fish_bm_site %>%
  select(!contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean")) %>%
  group_by(site_cat, year) %>%
  summarise(across(c(tot, herb, scar, acan, serr),
                   list(mean = ~ mean(.x, na.rm = TRUE),
                        se   = ~ se(.x)),
                   .names = "{.col}_{.fn}")) %>%
  ungroup()

# importing and merging coral spp sheets for 2025
sheets <- excel_sheets(file_coral)
sheets_to_join <- setdiff(sheets, c("TermsOfUse", "Metadata", "Overall")) # exclude two sheets before merging remainder
sheets_list <- map(sheets_to_join, ~ read_excel(file_coral, sheet = .x) %>%
                     rename_with(~ gsub("([A-Za-z]{4})(avg|med|std)$", "\\1_\\2", .x)) %>% 
                     rename_with(~ gsub("^(t)([A-Za-z]{4})(ave|med|std)$", "\\1_\\2_\\3", .x)) %>% 
                     clean_names())

coral_spp_2025 <- reduce(sheets_list, left_join, by = c("survey_id", "code", "name", "nt"))


# benthic ~ fish by year
benth_fish_year <- benth_fish_site %>%
  select(-contains("_se")) %>%
  rename_with(~ str_remove(.x, "_mean$")) %>%
  group_by(year, site_cat) %>%
  summarise(
    across(
      where(is.numeric),
      list(mean = ~mean(.x, na.rm = TRUE),
           se   = ~se(.x)),
      .names = "{.col}_{.fn}"
    ),
    .groups = "drop"
  )

# benthic NMDS NEMMA only
benthic_nmds <- benthic_cover_site %>%
  filter(site_cat == "NEMMA") %>%
  select(-site_cat, 
         -contains("_sd"), 
         -contains("cyan"),
         -contains("peys")) %>%
  rename_with(~ gsub("_mean", "", .x)) %>%
  mutate(year = as.character(year))

benthic_meta <- benthic_nmds %>% ungroup %>% select(site, year)
benthic_pc   <- benthic_nmds %>% ungroup %>% select(-site, -year)

set.seed(123) # fixes random number generator so results are reproducible
benthic_nmds_ord <- metaMDS(benthic_pc, distance = "bray", k = 2, trymax = 100, trace = 0)

site_scores <- scores(benthic_nmds_ord, display = "sites") %>%
  as.data.frame() %>%
  mutate(site = benthic_meta$site,
         year = benthic_meta$year)

pc_scores <- scores(benthic_nmds_ord, display = "species") %>%
  as.data.frame() %>%
  rownames_to_column(var = "category")

arrow_mult <- 2
pc_scores_scaled <- pc_scores %>%
  mutate(NMDS1 = NMDS1 * arrow_mult,
         NMDS2 = NMDS2 * arrow_mult)

pc_scores_scaled <- pc_scores_scaled %>%
  mutate(
    label_nudge_x = ifelse(NMDS1 >= 0, 0.05, -0.05),
    label_nudge_y = ifelse(NMDS2 >= 0, 0.05, -0.05)
  )

ggplot(site_scores, aes(x = NMDS1, y = NMDS2, color = year)) +
  geom_point(size = 4) +
  scale_shape_manual(values = 0:25) +
  stat_ellipse(aes(group = year), type = "t", linetype = 2) +
  geom_segment(data = pc_scores_scaled,
               aes(x = 0, y = 0, xend = NMDS1, yend = NMDS2),
               arrow = arrow(length = unit(0.3, "cm")),
               color = "black",
               inherit.aes = FALSE) +
  geom_text(data = pc_scores_scaled,
            aes(x = NMDS1 + label_nudge_x,
                y = NMDS2 + label_nudge_y,
                label = category),
            color = "black",
            inherit.aes = FALSE) +
  theme_minimal()

set.seed(123)
adonis_res <- adonis2(benthic_pc ~ year, 
                      data = benthic_meta, 
                      method = "bray", 
                      permutations = 999)

print(adonis_res)